{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# <center> Apply backpropagation algorithm to wheet seeds datasets</center>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset_dir = '/home/lidong/Datasets/ML' "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_dataset_from_file(path):\n",
    "    inputs = []\n",
    "    targets = []\n",
    "    with open(path) as f:\n",
    "        for line in f:\n",
    "            row = line.rstrip('\\n').split()\n",
    "            inputs.append(list(map(lambda x: float(x.strip()), row[:-1])))\n",
    "            targets.append(int(row[-1].strip()))\n",
    "            \n",
    "    # classify by raw values\n",
    "    class_values = set(targets)\n",
    "    class_indexs = dict()\n",
    "    for i, value in enumerate(class_values):\n",
    "        class_indexs[value] = i\n",
    "        \n",
    "    # one-hot\n",
    "    dataset = []\n",
    "    num_onehot = len(class_values)\n",
    "    for i in range(len(targets)):\n",
    "        values = [0 for i in range(num_onehot)]\n",
    "        values[class_indexs[targets[i]]] = 1\n",
    "        dataset.append((inputs[i], values))\n",
    "    return np.array(dataset)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def normalize_dataset(dataset):\n",
    "    inputs = dataset[:, 0]\n",
    "    minmax = [[min(column), max(column)] for column in zip(*inputs)]\n",
    "    columns = len(minmax)\n",
    "    for row in inputs:\n",
    "        for i in range(columns):\n",
    "            row[i] = round((row[i] - minmax[i][0]) / (minmax[i][1] - minmax[i][0]), 4)\n",
    "    return dataset"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Activitation function"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def activite_transfer(z):\n",
    "    # sigmoid\n",
    "    return 1.0/(1.0+np.exp(-z))\n",
    "\n",
    "def activite_derivative(z):\n",
    "    # the derivative of the sigmoid\n",
    "    return activite_transfer(z)*(1-activite_transfer(z))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def forward_propagate(network, x):\n",
    "    # 1d-array transpose\n",
    "    a = np.reshape(x, (len(x), 1))\n",
    "    zs = [] # the input layer, no activitation and z(weighted values)\n",
    "    activations = [a] # first for input layer\n",
    "    for b, w in zip(network['biases'], network['weights']):\n",
    "        z = np.dot(w, a) + b\n",
    "        zs.append(z)\n",
    "        a = activite_transfer(z)\n",
    "        activations.append(a)\n",
    "    return zs, activations"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Backward propagate:\n",
    "\n",
    "--------------------------------\n",
    "\n",
    "$$ \n",
    "\\begin{align*} \n",
    "\\delta^L &= (a^L-y) \\odot \\sigma'(z^L) \\\\\n",
    "\\delta^l &= ((w^{l+1})^T \\delta^{l+1}) \\odot \\sigma'(z^l) \\\\\n",
    "\\end{align*}\n",
    "$$\n",
    "\n",
    "--------------------------------"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def backward_propagate(network, y, zs, activations):\n",
    "    # init nabla(delta) values of weights and biases for all layers except input layer\n",
    "    nabla_b = [np.zeros(b.shape) for b in network['biases']]\n",
    "    nabla_w = [np.zeros(w.shape) for w in network['weights']]\n",
    "    \n",
    "    target = np.reshape(y, (len(y), 1))\n",
    "    nlayers = network['nlayers']\n",
    "    # Using nagetive index ingeniously\n",
    "    for l in range(1, nlayers):\n",
    "        if l == 1:\n",
    "            delta = (activations[-l] - target) * activite_derivative(zs[-l])\n",
    "        else:\n",
    "            delta = np.dot(network['weights'][-l+1].transpose(), delta) * activite_derivative(zs[-l])\n",
    "        nabla_b[-l] = delta\n",
    "        nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())\n",
    "    return (nabla_b, nabla_w)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "\n",
    "--------------------------------\n",
    "\n",
    "$$ \n",
    "\\begin{align*} \n",
    "\\dfrac{\\partial C}{\\partial w^l} &= \\delta^l (a^{l-1})^T \\\\\n",
    "w^l &= w^l-\\frac{\\eta}{m} \\sum_x \\delta^{x,l} (a^{x,l-1})^T \\\\\n",
    "\\dfrac{\\partial C}{\\partial b^l_j} &= \\delta^l_j \\\\\n",
    "b^l &= b^l-\\frac{\\eta}{m} \\sum_x \\delta^{x,l}\n",
    "\\end{align*}\n",
    "$$\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def update_weights_biases(network, total, eta, nabla_b, nabla_w):\n",
    "    network[\"biases\"] = [b - (eta/total)*nb for b, nb in zip(network[\"biases\"], nabla_b)]\n",
    "    network[\"weights\"] = [w - (eta/total)*nw for w, nw in zip(network[\"weights\"], nabla_w)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "class NeuralNetwork(object):\n",
    "    def __init__(self, dataset, sizes):\n",
    "        self.dataset = dataset\n",
    "        self.sizes = sizes\n",
    "        self.network = self.initialize_network(sizes)\n",
    "        print(sizes)\n",
    "        # print(self.network)\n",
    "        \n",
    "    @staticmethod\n",
    "    def initialize_network(sizes):\n",
    "        network = dict()\n",
    "        network['nlayers'] = len(sizes)\n",
    "        #TODO input layer does not exist weigths and biases\n",
    "        network[\"biases\"] = [np.random.randn(y, 1) for y in sizes[1:]]\n",
    "        network[\"weights\"] = [np.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])]\n",
    "        return network\n",
    "        \n",
    "    def train(self, batch_size, eta, epochs):\n",
    "        # SGD stochastic gradient descent\n",
    "        N = len(dataset)\n",
    "        for epoch in range(epochs):\n",
    "            np.random.shuffle(self.dataset)\n",
    "            batches = [self.dataset[k:k+batch_size] for k in range(0, N, batch_size)]\n",
    "            for batch in batches:\n",
    "                nabla_b = [np.zeros(b.shape) for b in self.network['biases']]\n",
    "                nabla_w = [np.zeros(w.shape) for w in self.network['weights']]\n",
    "                for x, y in batch:\n",
    "                    # 1. feed forward\n",
    "                    zs, activations = forward_propagate(self.network, x)\n",
    "                    # 2. back propagation\n",
    "                    delta_nabla_b, delta_nabla_w = backward_propagate(self.network, y, zs, activations)\n",
    "                    nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]\n",
    "                    nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]\n",
    "                    \n",
    "                # 3. update weights and biases\n",
    "                update_weights_biases(self.network, len(batch), eta, nabla_b, nabla_w)\n",
    "        # test        \n",
    "        np.random.shuffle(self.dataset)\n",
    "        print(self.evaluate(self.dataset[:70]))\n",
    "            \n",
    "    def evaluate(self, test_data):\n",
    "        count = 0\n",
    "        for x, y in test_data:\n",
    "            _, results = forward_propagate(self.network, x)\n",
    "            if y[np.argmax(results[-1])]:\n",
    "                count += 1\n",
    "        return (float(count)/len(test_data))        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = load_dataset_from_file(os.path.join(dataset_dir, 'seeds_dataset.txt'))\n",
    "dataset = normalize_dataset(dataset)\n",
    "ninput = len(dataset[0, 0])\n",
    "nhiden = 5\n",
    "noutput = len(dataset[0, 1])\n",
    "sizes=(ninput, nhiden, noutput)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(7, 5, 3)\n",
      "0.9714285714285714\n"
     ]
    }
   ],
   "source": [
    "nw = NeuralNetwork(dataset, sizes)\n",
    "nw.train(70, 0.2, 300)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
